# AMAR ET AL. - COUNTERING MISINFORMATION EARLY (2025)
## REPLICATION FILE: 07_attrition_follow_up.R
### This script conducts follow-up attrition analyses.
# ----
# Follow-up sample descriptives ----
follow_up_summary_tab <- bimli_svy_dkr.rm %>%
  mutate(
    follow_status = case_when(
      follow_reached == 1 ~ "Called, part.",
      follow_reached == 0 ~ "Called, not part.",
      is.na(follow_reached) ~ "Not called"
    ) %>% factor(levels = c(
      "Called, part.",
      "Called, not part.",
      "Not called"))
  ) %>%
  select(
    follow_status,
    treatment, gender, class, age, religion_hindu_num, language_hindu_num, 
    asset_index, fathers_education, mothers_education, school_gov_num, 
    mobile_internet_num, trust_newspapers_num, trust_social_media_num, 
    trust_tv_num, trust_friends_family_num, vaccinated_num, ayurveda_effective_num
  ) %>%
  tbl_svysummary(
    by = follow_status,  # Set follow_status as the grouping variable for columns
    type = c(
      where(is.numeric) ~ "continuous",
      where(is.factor) ~ "categorical",
      c(
        "religion_hindu_num", "language_hindu_num", "school_gov_num", 
        "mobile_internet_num", "trust_newspapers_num", "trust_social_media_num", 
        "trust_tv_num", "trust_friends_family_num", "vaccinated_num", 
        "ayurveda_effective_num"
      ) ~ "dichotomous"
    ),  # Set specific indicator variables as dichotomous
    statistic = list(
      all_continuous() ~ "{mean} ({sd})",  # Mean (SD) for continuous variables
      all_categorical() ~ "{n} ({p}%)",    # Count (Percentage) for categorical variables
      all_dichotomous() ~ "{p}%"           # Percentage for indicator variables
    ),
    label = list(
      treatment = "Treatment",
      gender = "Gender",
      class = "Grade",
      age = "Age",
      religion_hindu_num = "Religion - Hindu",
      language_hindu_num = "Language - Hindi",
      asset_index = "Asset Index",
      fathers_education = "Father's Education",
      mothers_education = "Mother's Education",
      school_gov_num = "Government School",
      mobile_internet_num = "Has Mobile Internet",
      trust_newspapers_num = "Trust Newspapers",
      trust_social_media_num = "Trust Social Media",
      trust_tv_num = "Trust TV",
      trust_friends_family_num = "Trust Friends",
      vaccinated_num = "Vaccinated",
      ayurveda_effective_num = "Ayurveda Effective"
    ),
    missing = "no"  # Exclude missing
  ) %>%
  add_overall() %>%
  modify_header(all_stat_cols() ~ "**{level}**\n*N = {n}*") %>%
  as_kable_extra(
    format = "latex",
    caption = "Second endline sample characteristics",
    label = "tab_follow_up_sample_descr"
  ) %>%
  kable_styling(latex_options = c("scale_down"))

writeLines(follow_up_summary_tab, "output/tables/tab_follow_up_sample_descr.tex")

# Predictors of follow-up attrition ----
attrition_follow_predictors <- list(
  list(iv = "treatment", label = "Treatment", type = "Condition"),
  list(iv = "gender", label = "Gender", type = "Demographic"),
  list(iv = "class", label = "Grade", type = "Demographic"),
  list(iv = "age", label = "Age", type = "Demographic"),
  list(iv = "religion_hindu_num", label = "Religion - Hindu", type = "Demographic"),
  list(iv = "language_hindu_num", label = "Language - Hindi", type = "Demographic"),
  list(iv = "asset_index", label = "Asset Index", type = "Demographic"),
  list(iv = "fathers_education", label = "Father's Education", type = "Education"),
  list(iv = "mothers_education", label = "Mother's Education", type = "Education"),
  list(iv = "school_gov_num", label = "Government School", type = "Education"),
  list(iv = "science", label = "Science Knowledge", type = "Education"),
  list(iv = "mobile_internet_num", label = "Mobile Internet", type = "Demographic"),
  list(iv = "trust_newspapers_num", label = "Newspapers", type = "Trust"),
  list(iv = "trust_social_media_num", label = "Social Media", type = "Trust"),
  list(iv = "trust_tv_num", label = "TV", type = "Trust"),
  list(iv = "trust_friends_family_num", label = "Friends and Family", type = "Trust"),
  list(iv = "vaccinated_num", label = "Vaccinated", type = "Trust"),
  list(iv = "ayurveda_effective_num", label = "Ayurveda", type = "Trust"),
  list(iv = "awareness", label = "Awareness", type = "Endline Outcome"),
  list(iv = "accuracy_discernment", label = "Accuracy Discernment", type = "Endline Outcome"),
  list(iv = "sharing_discernment", label = "Sharing Discernment", type = "Endline Outcome"),
  list(iv = "health_preferences", label = "Health Preferences", type = "Endline Outcome"),
  list(iv = "source_discernment", label = "Source Discernment", type = "Endline Outcome"),
  list(iv = "engagement_attitude", label = "Engagement Attitude", type = "Endline Outcome"),
  list(iv = "engagement_behavior", label = "Engagement Behavior", type = "Endline Outcome"))

attrition_follow_predictors <- rbindlist(attrition_follow_predictors, 
                                         fill = TRUE, use.names = TRUE)
attrition_follow_predictors <- data.frame(attrition_follow_predictors)

# function to tabulate regressions for each dv, with baseline controls
tab_regression_follow_attrition <- function(dv, iv, design) {
  fml <- as.formula(paste(dv, "~", iv, "+ library_spillover_pre"))  
  mod <- svyglm(fml, design = design)
  out <- tidy(mod) %>%
    mutate(
      dv = dv, 
      iv = iv,
      n = nobs(mod),
      .before = term
    )
  out
}

regressions_follow_attrition <- lapply(attrition_follow_predictors$iv, function(iv) {
  tab_regression_follow_attrition("follow_reached", iv, bimli_svy_dkr.rm)
}) %>%
  bind_rows() %>%
  mutate(
    margin = qnorm(0.975) * std.error,
    lower = estimate - margin,
    upper = estimate + margin
  ) %>%
  filter(term != "(Intercept)" & !grepl("library_spillover_pre", term))

regressions_follow_attrition <- left_join(attrition_follow_predictors, regressions_follow_attrition, by = "iv") %>%
  rowwise() %>%
  mutate(
    label_rec = case_when(grepl(iv, term) & nchar(term) > nchar(iv) ~ paste(label, "-", sub(iv, "", term)),
                          TRUE ~ label)) %>%
  ungroup() %>%
  mutate(
    label_rec = str_replace(label_rec, "- Media Literacy", ""),
    label_rec = str_replace(label_rec, " - 0", "")
  )

## Plot ----
regressions_follow_attrition %>%
  mutate(significant = if_else(p.value <= 0.05, "Yes", "No")) %>%
  ggplot(aes(x = estimate, y = fct_rev(label_rec), color = significant)) +
  geom_vline(xintercept = 0, lty = 2) +
  geom_point() +
  geom_errorbar(aes(xmin = lower, xmax = upper), width = 0) +
  facet_grid(rows = "type", scales = "free_y", space = "free_y", switch = "y") +
  xlab("Estimated effect on P(participate in second endline survey | called)") +
  scale_y_discrete(position = "right") +
  scale_color_manual(values = c("black", "red")) +
  theme_bw() %+replace%
  theme(axis.title.y = element_blank(),
        strip.text.y.left = element_text(angle = 0),
        legend.position = "none")
ggsave("output/figures/tab_attrition_follow_predictors.pdf", height = 5, width = 8)

## Table ----
regressions_follow_attrition %>%
  #filter(idx == type & secondary_outcome == FALSE) %>% # filter to variables that are included in the index
  select(label_rec, n, estimate, std.error, p.value) %>%
  mutate(n = format(n, big.mark = ","),
         sig.stars = case_when(p.value < 0.001 ~ "***",
                               p.value < 0.01 ~ "**",
                               p.value < 0.05 ~ "*",
                               TRUE ~ ""),
         estimate = str_c(format(round(estimate, 2), nsmall = 2), sig.stars), # add significance stars to estimate
         std.error = round(std.error, 3),
         p.value = case_when(p.value > 0.9 ~ "$>$0.9",
                             p.value < 0.001 ~ "$<$0.001",
                             p.value < 0.01 ~ as.character(round(p.value, 3)),
                             TRUE ~ as.character(round(p.value, 2)))) %>%
  select(-sig.stars) %>%
  rename(Predictor = label_rec,
         N = n,
         Estimate = estimate,
         SE = std.error,
         "p-value" = p.value) %>%
  xtable::xtable(caption = "Second endline attrition predictors", align = "llllll", digits = 3,
                 label = "tab:attrition_follow") %>%
  print(include.rownames = FALSE, 
        caption.placement = "top",
        sanitize.text.function = identity, # add backslashes to special characters
        sanitize.colnames.function = \(x) str_c("\\textbf{", x, "}"),
        add.to.row = list(pos = list(nrow(.)), 
                          command = paste(
                            "\\hline\n\\multicolumn{5}{l}{\\footnotesize *p$<$0.05; **p$<$0.01; ***p$<$0.001. Models include library-spillover strata FEs.} \\\\\n")),
        hline.after = c(-1,0), # add horizontal lane after second to last column
        comment = FALSE,
        table.placement = getOption("xtable.table.placement", "h!"),
        file = paste0("output/tables/tab_attrition_follow", ".tex"))


# Predictors by condition ----
# function to tabulate regressions for each dv, with baseline controls
tab_regression_attrition_follow_int <- function(dv, iv, design) {
  fml <- as.formula(paste(dv, "~", iv, "+", "treatment +", iv, "* treatment", "+ library_spillover_pre"))  
  mod <- svyglm(fml, design = design)
  out <- tidy(mod) %>%
    mutate(
      dv = dv, 
      iv = iv,
      n = nobs(mod),
      .before = term
    )
  out
}

attrition_follow_predictors_int <- attrition_follow_predictors %>%
  filter(iv != "treatment")

regressions_attrition_follow_int <- lapply(attrition_follow_predictors_int$iv, function(iv) {
  tab_regression_attrition_follow_int("follow_reached", iv, bimli_svy_dkr.rm)
}) %>%
  bind_rows() %>%
  mutate(
    margin = qnorm(0.975) * std.error,
    lower = estimate - margin,
    upper = estimate + margin
  ) %>%
  filter(term != "(Intercept)" & !grepl("library_spillover_pre", term) &
           grepl(":treatment", term))

regressions_attrition_follow_int <- left_join(attrition_follow_predictors_int, regressions_attrition_follow_int, by = "iv") %>%
  rowwise() %>%
  mutate(
    label_rec = case_when(grepl(iv, term) & nchar(term) > nchar(iv) + nchar(":treatmentMedia Literacy") ~ paste(label, "-", sub(iv, "", term)),
                          TRUE ~ label),
    label_rec = sub(":treatmentMedia Literacy", "",label_rec),
    label_rec = paste0(label_rec, " * T")) %>%
  ungroup()

## Plot ----
regressions_attrition_follow_int %>%
  mutate(significant = if_else(p.value <= 0.05, "Yes", "No")) %>%
  ggplot(aes(x = estimate, y = fct_rev(label_rec), color = significant)) +
  geom_vline(xintercept = 0, lty = 2) +
  geom_point() +
  geom_errorbar(aes(xmin = lower, xmax = upper), width = 0) +
  facet_grid(rows = "type", scales = "free_y", space = "free_y", switch = "y") +
  xlab("Estimated effect on P(participate in second endline survey | called)") +
  scale_y_discrete(position = "right") +
  scale_color_manual(values = c("black", "red")) +
  theme_bw() %+replace%
  theme(axis.title.y = element_blank(),
        strip.text.y.left = element_text(angle = 0),
        legend.position = "none")
ggsave("output/figures/attrition_follow_predictors_interaction.pdf", height = 5, width = 8)


## Table ----
regressions_attrition_follow_int %>%
  select(label_rec, n, estimate, std.error, p.value) %>%
  mutate(n = format(n, big.mark = ","),
         sig.stars = case_when(p.value < 0.001 ~ "***",
                               p.value < 0.01 ~ "**",
                               p.value < 0.05 ~ "*",
                               TRUE ~ ""),
         estimate = str_c(format(round(estimate, 2), nsmall = 2), sig.stars), # add significance stars to estimate
         std.error = round(std.error, 3),
         p.value = case_when(p.value > 0.9 ~ "$>$0.9",
                             p.value < 0.001 ~ "$<$0.001",
                             p.value < 0.01 ~ as.character(round(p.value, 3)),
                             TRUE ~ as.character(round(p.value, 2)))) %>%
  select(-sig.stars) %>%
  rename(Predictor = label_rec,
         N = n,
         Estimate = estimate,
         SE = std.error,
         "p-value" = p.value) %>%
  xtable::xtable(caption = "Second endline attrition predictors (*treatment)", 
                 align = "llllll", digits = 3,
                 label = "tab:attrition_follow_int") %>%
  print(include.rownames = FALSE, 
        caption.placement = "top",
        sanitize.text.function = identity, # add backslashes to special characters
        sanitize.colnames.function = \(x) str_c("\\textbf{", x, "}"),
        add.to.row = list(pos = list(nrow(.)), 
                          command = paste(
                            "\\hline\n\\multicolumn{5}{l}{\\footnotesize *p$<$0.05; **p$<$0.01; ***p$<$0.001. Models include library-spillover strata FEs.} \\\\\n")),
        hline.after = c(-1,0), # add horizontal lane after second to last column
        comment = FALSE,
        table.placement = getOption("xtable.table.placement", "h!"),
        file = paste0("output/tables/tab_attrition_follow_int", ".tex"))

# END of 07_attrition_follow_up.R ----